{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "073d7167",
   "metadata": {},
   "outputs": [],
   "source": [
    "# !pip3 install transformers -U"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "3a790239",
   "metadata": {},
   "outputs": [],
   "source": [
    "import logging\n",
    "\n",
    "logging.basicConfig(level=logging.DEBUG)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "id": "6a0305bd",
   "metadata": {},
   "outputs": [],
   "source": [
    "from transformers import LlavaForConditionalGeneration, LlavaConfig, CLIPVisionConfig, LlamaConfig\n",
    "from transformers import CLIPVisionModel, CLIPImageProcessor\n",
    "from transformers import MistralConfig"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "a12a75bf",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "DEBUG:urllib3.connectionpool:Starting new HTTPS connection (1): huggingface.co:443\n",
      "DEBUG:urllib3.connectionpool:https://huggingface.co:443 \"HEAD /openai/clip-vit-large-patch14-336/resolve/main/config.json HTTP/1.1\" 200 0\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "CLIPVisionConfig {\n",
       "  \"attention_dropout\": 0.0,\n",
       "  \"dropout\": 0.0,\n",
       "  \"hidden_act\": \"quick_gelu\",\n",
       "  \"hidden_size\": 1024,\n",
       "  \"image_size\": 336,\n",
       "  \"initializer_factor\": 1.0,\n",
       "  \"initializer_range\": 0.02,\n",
       "  \"intermediate_size\": 4096,\n",
       "  \"layer_norm_eps\": 1e-05,\n",
       "  \"model_type\": \"clip_vision_model\",\n",
       "  \"num_attention_heads\": 16,\n",
       "  \"num_channels\": 3,\n",
       "  \"num_hidden_layers\": 24,\n",
       "  \"patch_size\": 14,\n",
       "  \"projection_dim\": 768,\n",
       "  \"transformers_version\": \"4.36.0\"\n",
       "}"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "vision_config = CLIPVisionConfig.from_pretrained('openai/clip-vit-large-patch14-336')\n",
    "vision_config"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "id": "cca57e0b",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "DEBUG:urllib3.connectionpool:https://huggingface.co:443 \"HEAD /mesolitica/malaysian-mistral-7b-32k-instructions-v2/resolve/main/config.json HTTP/1.1\" 200 0\n"
     ]
    }
   ],
   "source": [
    "text_config = MistralConfig.from_pretrained('mesolitica/malaysian-mistral-7b-32k-instructions-v2')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "id": "16efafa5",
   "metadata": {},
   "outputs": [],
   "source": [
    "configuration = LlavaConfig(vision_config, text_config)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "id": "6b5721f9",
   "metadata": {},
   "outputs": [],
   "source": [
    "model = LlavaForConditionalGeneration(configuration)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "id": "092795ef",
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "DEBUG:urllib3.connectionpool:https://huggingface.co:443 \"HEAD /openai/clip-vit-large-patch14-336/resolve/main/config.json HTTP/1.1\" 200 0\n",
      "DEBUG:urllib3.connectionpool:https://huggingface.co:443 \"HEAD /openai/clip-vit-large-patch14-336/resolve/main/config.json HTTP/1.1\" 200 0\n"
     ]
    }
   ],
   "source": [
    "model.vision_tower = model.vision_tower.from_pretrained('openai/clip-vit-large-patch14-336')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "id": "c5e0943c",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "DEBUG:urllib3.connectionpool:https://huggingface.co:443 \"HEAD /mesolitica/malaysian-mistral-7b-32k-instructions-v2/resolve/main/config.json HTTP/1.1\" 200 0\n",
      "DEBUG:urllib3.connectionpool:https://huggingface.co:443 \"HEAD /mesolitica/malaysian-mistral-7b-32k-instructions-v2/resolve/main/adapter_config.json HTTP/1.1\" 404 0\n",
      "DEBUG:urllib3.connectionpool:https://huggingface.co:443 \"HEAD /mesolitica/malaysian-mistral-7b-32k-instructions-v2/resolve/main/config.json HTTP/1.1\" 200 0\n",
      "DEBUG:urllib3.connectionpool:https://huggingface.co:443 \"HEAD /mesolitica/malaysian-mistral-7b-32k-instructions-v2/resolve/main/model.safetensors HTTP/1.1\" 404 0\n",
      "DEBUG:urllib3.connectionpool:https://huggingface.co:443 \"HEAD /mesolitica/malaysian-mistral-7b-32k-instructions-v2/resolve/main/model.safetensors.index.json HTTP/1.1\" 200 0\n",
      "DEBUG:filelock:Attempting to acquire lock 139942327601088 on /home/ubuntu/.cache/huggingface/hub/.locks/models--mesolitica--malaysian-mistral-7b-32k-instructions-v2/b349bc0ce8075a2a5df1e0210450308018d396c8.lock\n",
      "DEBUG:filelock:Lock 139942327601088 acquired on /home/ubuntu/.cache/huggingface/hub/.locks/models--mesolitica--malaysian-mistral-7b-32k-instructions-v2/b349bc0ce8075a2a5df1e0210450308018d396c8.lock\n",
      "DEBUG:urllib3.connectionpool:https://huggingface.co:443 \"GET /mesolitica/malaysian-mistral-7b-32k-instructions-v2/resolve/main/model.safetensors.index.json HTTP/1.1\" 200 23950\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "ada847c702014e69a7cdc554070c224e",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "model.safetensors.index.json:   0%|          | 0.00/23.9k [00:00<?, ?B/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "DEBUG:filelock:Attempting to release lock 139942327601088 on /home/ubuntu/.cache/huggingface/hub/.locks/models--mesolitica--malaysian-mistral-7b-32k-instructions-v2/b349bc0ce8075a2a5df1e0210450308018d396c8.lock\n",
      "DEBUG:filelock:Lock 139942327601088 released on /home/ubuntu/.cache/huggingface/hub/.locks/models--mesolitica--malaysian-mistral-7b-32k-instructions-v2/b349bc0ce8075a2a5df1e0210450308018d396c8.lock\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "273a76a51375409f898ce073b332517a",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Downloading shards:   0%|          | 0/3 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "DEBUG:urllib3.connectionpool:https://huggingface.co:443 \"HEAD /mesolitica/malaysian-mistral-7b-32k-instructions-v2/resolve/main/model-00001-of-00003.safetensors HTTP/1.1\" 302 0\n",
      "DEBUG:filelock:Attempting to acquire lock 139942327531360 on /home/ubuntu/.cache/huggingface/hub/.locks/models--mesolitica--malaysian-mistral-7b-32k-instructions-v2/c6f693e0949d49dcfb2b59845e8a3f4ea96fc4fb0d176ba02a61b19dd3b422c1.lock\n",
      "DEBUG:filelock:Lock 139942327531360 acquired on /home/ubuntu/.cache/huggingface/hub/.locks/models--mesolitica--malaysian-mistral-7b-32k-instructions-v2/c6f693e0949d49dcfb2b59845e8a3f4ea96fc4fb0d176ba02a61b19dd3b422c1.lock\n",
      "DEBUG:urllib3.connectionpool:Starting new HTTPS connection (1): cdn-lfs-us-1.huggingface.co:443\n",
      "DEBUG:urllib3.connectionpool:https://cdn-lfs-us-1.huggingface.co:443 \"GET /repos/b5/3c/b53c97364a79ebdb64d88797ba08c1713c61d5352f6b2c8cc2f5b88f0e702c0e/c6f693e0949d49dcfb2b59845e8a3f4ea96fc4fb0d176ba02a61b19dd3b422c1?response-content-disposition=attachment%3B+filename*%3DUTF-8%27%27model-00001-of-00003.safetensors%3B+filename%3D%22model-00001-of-00003.safetensors%22%3B&Expires=1702791088&Policy=eyJTdGF0ZW1lbnQiOlt7IkNvbmRpdGlvbiI6eyJEYXRlTGVzc1RoYW4iOnsiQVdTOkVwb2NoVGltZSI6MTcwMjc5MTA4OH19LCJSZXNvdXJjZSI6Imh0dHBzOi8vY2RuLWxmcy11cy0xLmh1Z2dpbmdmYWNlLmNvL3JlcG9zL2I1LzNjL2I1M2M5NzM2NGE3OWViZGI2NGQ4ODc5N2JhMDhjMTcxM2M2MWQ1MzUyZjZiMmM4Y2MyZjViODhmMGU3MDJjMGUvYzZmNjkzZTA5NDlkNDlkY2ZiMmI1OTg0NWU4YTNmNGVhOTZmYzRmYjBkMTc2YmEwMmE2MWIxOWRkM2I0MjJjMT9yZXNwb25zZS1jb250ZW50LWRpc3Bvc2l0aW9uPSoifV19&Signature=l-Vlcg90SAfNVUQnnDL7qE7~54xsDDLRTO6HKNYSyoqv~RwdblLdnzcagJTae0ceTKWNOHtLv48CNEhkkKkmh1cd5nEZW4IstIE1ZAPVae5PNglMddvI3oTktVp27rcgF81WeVLxKIUrqejonFughN01DHqEY0b5WveJr7KFlPvZ~mmG6meBnhG6oEyVA5KAs64Qu6a9p7C8qG4MnQGzYnwIa9SY7XimK7qqCO0k2zUDzPfiuHOz8agCKH8YGGcJ5h1dosb9WFdJSIarhKjvcHZjwwycDOSo6EE1farKBptsIdoPXZyoqj6eaROBCV70cKMIgMIHMNTq~vdQsyzVRA__&Key-Pair-Id=KCD77M1F0VK2B HTTP/1.1\" 200 4943162336\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "f32914bd96b84b66bdb157ab9be7b63a",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "model-00001-of-00003.safetensors:   0%|          | 0.00/4.94G [00:00<?, ?B/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "DEBUG:filelock:Attempting to release lock 139942327531360 on /home/ubuntu/.cache/huggingface/hub/.locks/models--mesolitica--malaysian-mistral-7b-32k-instructions-v2/c6f693e0949d49dcfb2b59845e8a3f4ea96fc4fb0d176ba02a61b19dd3b422c1.lock\n",
      "DEBUG:filelock:Lock 139942327531360 released on /home/ubuntu/.cache/huggingface/hub/.locks/models--mesolitica--malaysian-mistral-7b-32k-instructions-v2/c6f693e0949d49dcfb2b59845e8a3f4ea96fc4fb0d176ba02a61b19dd3b422c1.lock\n",
      "DEBUG:urllib3.connectionpool:https://huggingface.co:443 \"HEAD /mesolitica/malaysian-mistral-7b-32k-instructions-v2/resolve/main/model-00002-of-00003.safetensors HTTP/1.1\" 302 0\n",
      "DEBUG:filelock:Attempting to acquire lock 139942329600896 on /home/ubuntu/.cache/huggingface/hub/.locks/models--mesolitica--malaysian-mistral-7b-32k-instructions-v2/94b7061f6f31b582e6f808c4912be2ff6db3704505e2e791c81c25f72636b523.lock\n",
      "DEBUG:filelock:Lock 139942329600896 acquired on /home/ubuntu/.cache/huggingface/hub/.locks/models--mesolitica--malaysian-mistral-7b-32k-instructions-v2/94b7061f6f31b582e6f808c4912be2ff6db3704505e2e791c81c25f72636b523.lock\n",
      "DEBUG:urllib3.connectionpool:https://cdn-lfs-us-1.huggingface.co:443 \"GET /repos/b5/3c/b53c97364a79ebdb64d88797ba08c1713c61d5352f6b2c8cc2f5b88f0e702c0e/94b7061f6f31b582e6f808c4912be2ff6db3704505e2e791c81c25f72636b523?response-content-disposition=attachment%3B+filename*%3DUTF-8%27%27model-00002-of-00003.safetensors%3B+filename%3D%22model-00002-of-00003.safetensors%22%3B&Expires=1702791107&Policy=eyJTdGF0ZW1lbnQiOlt7IkNvbmRpdGlvbiI6eyJEYXRlTGVzc1RoYW4iOnsiQVdTOkVwb2NoVGltZSI6MTcwMjc5MTEwN319LCJSZXNvdXJjZSI6Imh0dHBzOi8vY2RuLWxmcy11cy0xLmh1Z2dpbmdmYWNlLmNvL3JlcG9zL2I1LzNjL2I1M2M5NzM2NGE3OWViZGI2NGQ4ODc5N2JhMDhjMTcxM2M2MWQ1MzUyZjZiMmM4Y2MyZjViODhmMGU3MDJjMGUvOTRiNzA2MWY2ZjMxYjU4MmU2ZjgwOGM0OTEyYmUyZmY2ZGIzNzA0NTA1ZTJlNzkxYzgxYzI1ZjcyNjM2YjUyMz9yZXNwb25zZS1jb250ZW50LWRpc3Bvc2l0aW9uPSoifV19&Signature=WflvWVCK8XQr-2K0DZpKUjSFgAhuoPjlZQrK5xx5sotr3vSfg55D7DJlZUzLWCyUvr0Lh3nogqQljASRp8z1nSAYjMICHvxEh6RZb70d8XF0bFqnu~19gWd9kWFzINH2WP9Ow~u2Qj7V-cB-iuyxU-7fKq5uikr2XChm9WmN2bnDrZX4tu1GW2WMHUNSxXFtg1sm05TYZLoQX1ZclPRCuoKkXh0NY6-6iVXJExGwug~2LeH3T5MtcoQtJZQ~O-f~S8MMq5wK23Ylvc83GLIPEYfgE7Iz4G9NSHQyhID~s~ucq~mX5DTJDBFqOWcLUYDN0UmWavjfSp-8DEDvjKpGaQ__&Key-Pair-Id=KCD77M1F0VK2B HTTP/1.1\" 200 4999819336\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "25a5a059158b4979bd1f6118e1eb971c",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "model-00002-of-00003.safetensors:   0%|          | 0.00/5.00G [00:00<?, ?B/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "DEBUG:filelock:Attempting to release lock 139942329600896 on /home/ubuntu/.cache/huggingface/hub/.locks/models--mesolitica--malaysian-mistral-7b-32k-instructions-v2/94b7061f6f31b582e6f808c4912be2ff6db3704505e2e791c81c25f72636b523.lock\n",
      "DEBUG:filelock:Lock 139942329600896 released on /home/ubuntu/.cache/huggingface/hub/.locks/models--mesolitica--malaysian-mistral-7b-32k-instructions-v2/94b7061f6f31b582e6f808c4912be2ff6db3704505e2e791c81c25f72636b523.lock\n",
      "DEBUG:urllib3.connectionpool:https://huggingface.co:443 \"HEAD /mesolitica/malaysian-mistral-7b-32k-instructions-v2/resolve/main/model-00003-of-00003.safetensors HTTP/1.1\" 302 0\n",
      "DEBUG:filelock:Attempting to acquire lock 139942327532992 on /home/ubuntu/.cache/huggingface/hub/.locks/models--mesolitica--malaysian-mistral-7b-32k-instructions-v2/04e8d005464aa12e82d7ba8f652e6cc1a9b01fe3277581b44bec5f7b442ba0f1.lock\n",
      "DEBUG:filelock:Lock 139942327532992 acquired on /home/ubuntu/.cache/huggingface/hub/.locks/models--mesolitica--malaysian-mistral-7b-32k-instructions-v2/04e8d005464aa12e82d7ba8f652e6cc1a9b01fe3277581b44bec5f7b442ba0f1.lock\n",
      "DEBUG:urllib3.connectionpool:https://cdn-lfs-us-1.huggingface.co:443 \"GET /repos/b5/3c/b53c97364a79ebdb64d88797ba08c1713c61d5352f6b2c8cc2f5b88f0e702c0e/04e8d005464aa12e82d7ba8f652e6cc1a9b01fe3277581b44bec5f7b442ba0f1?response-content-disposition=attachment%3B+filename*%3DUTF-8%27%27model-00003-of-00003.safetensors%3B+filename%3D%22model-00003-of-00003.safetensors%22%3B&Expires=1702791235&Policy=eyJTdGF0ZW1lbnQiOlt7IkNvbmRpdGlvbiI6eyJEYXRlTGVzc1RoYW4iOnsiQVdTOkVwb2NoVGltZSI6MTcwMjc5MTIzNX19LCJSZXNvdXJjZSI6Imh0dHBzOi8vY2RuLWxmcy11cy0xLmh1Z2dpbmdmYWNlLmNvL3JlcG9zL2I1LzNjL2I1M2M5NzM2NGE3OWViZGI2NGQ4ODc5N2JhMDhjMTcxM2M2MWQ1MzUyZjZiMmM4Y2MyZjViODhmMGU3MDJjMGUvMDRlOGQwMDU0NjRhYTEyZTgyZDdiYThmNjUyZTZjYzFhOWIwMWZlMzI3NzU4MWI0NGJlYzVmN2I0NDJiYTBmMT9yZXNwb25zZS1jb250ZW50LWRpc3Bvc2l0aW9uPSoifV19&Signature=EvWKBI-8MQT21fZUXtf3rewKLYq7nE5WZDMcGXuq1tBLxEgTfTDhf~LdonIwkjN3CzSamf559uNasvP~ZVS-IZIjEkfck3b8sMB91cJI5A1Pe4u84LWzuzTKma7Sz79rXS2TbPcmIay17usoP~aF11XkGegelnBBvkmb1M5iR~Y3KkUMC1C6Ycb3Lz9~SBxB34F9Ru0-HE2-yx88KB-CDf74WrwDr0UYlEyOnD1YJpPG9es5vmaI0onRKtJezcOqjK4clUr9FG6Qs~RWCGMCmRaKu50rf4d9hVgKI2ZQGJHFqJ95oWJXVK90vBTIIrOvSg6ITA0Fty0WLhz~GaJ5Aw__&Key-Pair-Id=KCD77M1F0VK2B HTTP/1.1\" 200 4540516344\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "c6f130e42c314686a4fcff673fdb4347",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "model-00003-of-00003.safetensors:   0%|          | 0.00/4.54G [00:00<?, ?B/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "DEBUG:filelock:Attempting to release lock 139942327532992 on /home/ubuntu/.cache/huggingface/hub/.locks/models--mesolitica--malaysian-mistral-7b-32k-instructions-v2/04e8d005464aa12e82d7ba8f652e6cc1a9b01fe3277581b44bec5f7b442ba0f1.lock\n",
      "DEBUG:filelock:Lock 139942327532992 released on /home/ubuntu/.cache/huggingface/hub/.locks/models--mesolitica--malaysian-mistral-7b-32k-instructions-v2/04e8d005464aa12e82d7ba8f652e6cc1a9b01fe3277581b44bec5f7b442ba0f1.lock\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "9f65353feffd4b7eabf1a1af176260f6",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "DEBUG:urllib3.connectionpool:https://huggingface.co:443 \"HEAD /mesolitica/malaysian-mistral-7b-32k-instructions-v2/resolve/main/generation_config.json HTTP/1.1\" 200 0\n",
      "DEBUG:filelock:Attempting to acquire lock 139942325770160 on /home/ubuntu/.cache/huggingface/hub/.locks/models--mesolitica--malaysian-mistral-7b-32k-instructions-v2/ea800c3a686d48e75a09a9bc30994a0b9da09f2a.lock\n",
      "DEBUG:filelock:Lock 139942325770160 acquired on /home/ubuntu/.cache/huggingface/hub/.locks/models--mesolitica--malaysian-mistral-7b-32k-instructions-v2/ea800c3a686d48e75a09a9bc30994a0b9da09f2a.lock\n",
      "DEBUG:urllib3.connectionpool:https://huggingface.co:443 \"GET /mesolitica/malaysian-mistral-7b-32k-instructions-v2/resolve/main/generation_config.json HTTP/1.1\" 200 111\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "70440c04c1a14c09bd385335b67fd835",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "generation_config.json:   0%|          | 0.00/111 [00:00<?, ?B/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "DEBUG:filelock:Attempting to release lock 139942325770160 on /home/ubuntu/.cache/huggingface/hub/.locks/models--mesolitica--malaysian-mistral-7b-32k-instructions-v2/ea800c3a686d48e75a09a9bc30994a0b9da09f2a.lock\n",
      "DEBUG:filelock:Lock 139942325770160 released on /home/ubuntu/.cache/huggingface/hub/.locks/models--mesolitica--malaysian-mistral-7b-32k-instructions-v2/ea800c3a686d48e75a09a9bc30994a0b9da09f2a.lock\n"
     ]
    }
   ],
   "source": [
    "model.language_model = model.language_model.from_pretrained('mesolitica/malaysian-mistral-7b-32k-instructions-v2')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "id": "793d1e2c",
   "metadata": {},
   "outputs": [],
   "source": [
    "from transformers import AutoProcessor\n",
    "import requests\n",
    "from PIL import Image"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "id": "e8602f36",
   "metadata": {},
   "outputs": [],
   "source": [
    "prompt = \"USER: <image>\\nWhat are these?\\nASSISTANT:\"\n",
    "image_file = \"http://images.cocodataset.org/val2017/000000039769.jpg\"\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "id": "250b47f3",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "DEBUG:urllib3.connectionpool:Resetting dropped connection: huggingface.co\n",
      "DEBUG:urllib3.connectionpool:https://huggingface.co:443 \"HEAD /llava-hf/bakLlava-v1-hf/resolve/main/preprocessor_config.json HTTP/1.1\" 200 0\n",
      "DEBUG:filelock:Attempting to acquire lock 139943770667952 on /home/ubuntu/.cache/huggingface/hub/.locks/models--llava-hf--bakLlava-v1-hf/38f4d736a2e2b34af9eaef607847407c0efc7980.lock\n",
      "DEBUG:filelock:Lock 139943770667952 acquired on /home/ubuntu/.cache/huggingface/hub/.locks/models--llava-hf--bakLlava-v1-hf/38f4d736a2e2b34af9eaef607847407c0efc7980.lock\n",
      "DEBUG:urllib3.connectionpool:https://huggingface.co:443 \"GET /llava-hf/bakLlava-v1-hf/resolve/main/preprocessor_config.json HTTP/1.1\" 200 505\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "ab4d05f01d9e43b2a13a22ed1290723a",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "preprocessor_config.json:   0%|          | 0.00/505 [00:00<?, ?B/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "DEBUG:filelock:Attempting to release lock 139943770667952 on /home/ubuntu/.cache/huggingface/hub/.locks/models--llava-hf--bakLlava-v1-hf/38f4d736a2e2b34af9eaef607847407c0efc7980.lock\n",
      "DEBUG:filelock:Lock 139943770667952 released on /home/ubuntu/.cache/huggingface/hub/.locks/models--llava-hf--bakLlava-v1-hf/38f4d736a2e2b34af9eaef607847407c0efc7980.lock\n",
      "DEBUG:urllib3.connectionpool:https://huggingface.co:443 \"HEAD /llava-hf/bakLlava-v1-hf/resolve/main/preprocessor_config.json HTTP/1.1\" 200 0\n",
      "DEBUG:urllib3.connectionpool:https://huggingface.co:443 \"HEAD /llava-hf/bakLlava-v1-hf/resolve/main/preprocessor_config.json HTTP/1.1\" 200 0\n",
      "DEBUG:urllib3.connectionpool:https://huggingface.co:443 \"HEAD /llava-hf/bakLlava-v1-hf/resolve/main/tokenizer_config.json HTTP/1.1\" 200 0\n",
      "DEBUG:filelock:Attempting to acquire lock 139943417095856 on /home/ubuntu/.cache/huggingface/hub/.locks/models--llava-hf--bakLlava-v1-hf/9e1fe5baca02dcc6fd027fcc4fccb49ab973548c.lock\n",
      "DEBUG:filelock:Lock 139943417095856 acquired on /home/ubuntu/.cache/huggingface/hub/.locks/models--llava-hf--bakLlava-v1-hf/9e1fe5baca02dcc6fd027fcc4fccb49ab973548c.lock\n",
      "DEBUG:urllib3.connectionpool:https://huggingface.co:443 \"GET /llava-hf/bakLlava-v1-hf/resolve/main/tokenizer_config.json HTTP/1.1\" 200 1408\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "9673745883d941618bac567020ffb21d",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "tokenizer_config.json:   0%|          | 0.00/1.41k [00:00<?, ?B/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "DEBUG:filelock:Attempting to release lock 139943417095856 on /home/ubuntu/.cache/huggingface/hub/.locks/models--llava-hf--bakLlava-v1-hf/9e1fe5baca02dcc6fd027fcc4fccb49ab973548c.lock\n",
      "DEBUG:filelock:Lock 139943417095856 released on /home/ubuntu/.cache/huggingface/hub/.locks/models--llava-hf--bakLlava-v1-hf/9e1fe5baca02dcc6fd027fcc4fccb49ab973548c.lock\n",
      "DEBUG:urllib3.connectionpool:https://huggingface.co:443 \"HEAD /llava-hf/bakLlava-v1-hf/resolve/main/tokenizer.model HTTP/1.1\" 302 0\n",
      "DEBUG:filelock:Attempting to acquire lock 139942329495808 on /home/ubuntu/.cache/huggingface/hub/.locks/models--llava-hf--bakLlava-v1-hf/dadfd56d766715c61d2ef780a525ab43b8e6da4de6865bda3d95fdef5e134055.lock\n",
      "DEBUG:filelock:Lock 139942329495808 acquired on /home/ubuntu/.cache/huggingface/hub/.locks/models--llava-hf--bakLlava-v1-hf/dadfd56d766715c61d2ef780a525ab43b8e6da4de6865bda3d95fdef5e134055.lock\n",
      "DEBUG:urllib3.connectionpool:Resetting dropped connection: cdn-lfs-us-1.huggingface.co\n",
      "DEBUG:urllib3.connectionpool:https://cdn-lfs-us-1.huggingface.co:443 \"GET /repos/94/8d/948d90c3014806b0f90cb45dfcc4a976890efb4cd7dafa5fc0fca66865db1482/dadfd56d766715c61d2ef780a525ab43b8e6da4de6865bda3d95fdef5e134055?response-content-disposition=attachment%3B+filename*%3DUTF-8%27%27tokenizer.model%3B+filename%3D%22tokenizer.model%22%3B&Expires=1702792010&Policy=eyJTdGF0ZW1lbnQiOlt7IkNvbmRpdGlvbiI6eyJEYXRlTGVzc1RoYW4iOnsiQVdTOkVwb2NoVGltZSI6MTcwMjc5MjAxMH19LCJSZXNvdXJjZSI6Imh0dHBzOi8vY2RuLWxmcy11cy0xLmh1Z2dpbmdmYWNlLmNvL3JlcG9zLzk0LzhkLzk0OGQ5MGMzMDE0ODA2YjBmOTBjYjQ1ZGZjYzRhOTc2ODkwZWZiNGNkN2RhZmE1ZmMwZmNhNjY4NjVkYjE0ODIvZGFkZmQ1NmQ3NjY3MTVjNjFkMmVmNzgwYTUyNWFiNDNiOGU2ZGE0ZGU2ODY1YmRhM2Q5NWZkZWY1ZTEzNDA1NT9yZXNwb25zZS1jb250ZW50LWRpc3Bvc2l0aW9uPSoifV19&Signature=evIQ0VvXI4WdGa-V88AIEbimJg7OHiIQcfGNqhx387LQ~5-pxYOf2-ju0kL07l-jAR40bszj6wDnq1h8RfsrVmWrj8bkNxKZGVneEhA1ZLAJU6sBh-Rhu-8oET5je3fccBNTiHWAX4Odxg-lvLpO3CKsD29mU4-cNx03MC~o5~exBvqlctOslQY8Ih1k5gJnobguZ1iQJUGm6B0OTI28Yhk~dw6qyRxYBpgPk-795AMzMQTktFqUSzOZB4Vg6k9U1F33TnyfaTX-Z-BLiIjSrY7wrwZZ00sbu7B-CH84jQh~zF-1uPXSkGaOX2tyRksD0lpxlEk82pjjhL9DyQMpFA__&Key-Pair-Id=KCD77M1F0VK2B HTTP/1.1\" 200 493443\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "2cbed587262e49248dc23441c9b3c293",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "tokenizer.model:   0%|          | 0.00/493k [00:00<?, ?B/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "DEBUG:filelock:Attempting to release lock 139942329495808 on /home/ubuntu/.cache/huggingface/hub/.locks/models--llava-hf--bakLlava-v1-hf/dadfd56d766715c61d2ef780a525ab43b8e6da4de6865bda3d95fdef5e134055.lock\n",
      "DEBUG:filelock:Lock 139942329495808 released on /home/ubuntu/.cache/huggingface/hub/.locks/models--llava-hf--bakLlava-v1-hf/dadfd56d766715c61d2ef780a525ab43b8e6da4de6865bda3d95fdef5e134055.lock\n",
      "DEBUG:urllib3.connectionpool:https://huggingface.co:443 \"HEAD /llava-hf/bakLlava-v1-hf/resolve/main/tokenizer.json HTTP/1.1\" 200 0\n",
      "DEBUG:filelock:Attempting to acquire lock 139942329496576 on /home/ubuntu/.cache/huggingface/hub/.locks/models--llava-hf--bakLlava-v1-hf/d997fd35c8cc77291e33f873ba32117a43f054c7.lock\n",
      "DEBUG:filelock:Lock 139942329496576 acquired on /home/ubuntu/.cache/huggingface/hub/.locks/models--llava-hf--bakLlava-v1-hf/d997fd35c8cc77291e33f873ba32117a43f054c7.lock\n",
      "DEBUG:urllib3.connectionpool:https://huggingface.co:443 \"GET /llava-hf/bakLlava-v1-hf/resolve/main/tokenizer.json HTTP/1.1\" 200 1795668\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "a600e851f34a4bb2aecc10be22e740b9",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "tokenizer.json:   0%|          | 0.00/1.80M [00:00<?, ?B/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "DEBUG:filelock:Attempting to release lock 139942329496576 on /home/ubuntu/.cache/huggingface/hub/.locks/models--llava-hf--bakLlava-v1-hf/d997fd35c8cc77291e33f873ba32117a43f054c7.lock\n",
      "DEBUG:filelock:Lock 139942329496576 released on /home/ubuntu/.cache/huggingface/hub/.locks/models--llava-hf--bakLlava-v1-hf/d997fd35c8cc77291e33f873ba32117a43f054c7.lock\n",
      "DEBUG:urllib3.connectionpool:https://huggingface.co:443 \"HEAD /llava-hf/bakLlava-v1-hf/resolve/main/added_tokens.json HTTP/1.1\" 200 0\n",
      "DEBUG:filelock:Attempting to acquire lock 139943781933088 on /home/ubuntu/.cache/huggingface/hub/.locks/models--llava-hf--bakLlava-v1-hf/4bd5222947172917d6a7770e47e797f3253c669a.lock\n",
      "DEBUG:filelock:Lock 139943781933088 acquired on /home/ubuntu/.cache/huggingface/hub/.locks/models--llava-hf--bakLlava-v1-hf/4bd5222947172917d6a7770e47e797f3253c669a.lock\n",
      "DEBUG:urllib3.connectionpool:https://huggingface.co:443 \"GET /llava-hf/bakLlava-v1-hf/resolve/main/added_tokens.json HTTP/1.1\" 200 41\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "e0535d00c9104a1caf13df98b31ec246",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "added_tokens.json:   0%|          | 0.00/41.0 [00:00<?, ?B/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "DEBUG:filelock:Attempting to release lock 139943781933088 on /home/ubuntu/.cache/huggingface/hub/.locks/models--llava-hf--bakLlava-v1-hf/4bd5222947172917d6a7770e47e797f3253c669a.lock\n",
      "DEBUG:filelock:Lock 139943781933088 released on /home/ubuntu/.cache/huggingface/hub/.locks/models--llava-hf--bakLlava-v1-hf/4bd5222947172917d6a7770e47e797f3253c669a.lock\n",
      "DEBUG:urllib3.connectionpool:https://huggingface.co:443 \"HEAD /llava-hf/bakLlava-v1-hf/resolve/main/special_tokens_map.json HTTP/1.1\" 200 0\n",
      "DEBUG:filelock:Attempting to acquire lock 139942325769584 on /home/ubuntu/.cache/huggingface/hub/.locks/models--llava-hf--bakLlava-v1-hf/a782b2f1cdab4d0bacb2dc0f85d02c4b1e31f0bd.lock\n",
      "DEBUG:filelock:Lock 139942325769584 acquired on /home/ubuntu/.cache/huggingface/hub/.locks/models--llava-hf--bakLlava-v1-hf/a782b2f1cdab4d0bacb2dc0f85d02c4b1e31f0bd.lock\n",
      "DEBUG:urllib3.connectionpool:https://huggingface.co:443 \"GET /llava-hf/bakLlava-v1-hf/resolve/main/special_tokens_map.json HTTP/1.1\" 200 552\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "99aa730bc2d347e0a14102e5db566b83",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "special_tokens_map.json:   0%|          | 0.00/552 [00:00<?, ?B/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "DEBUG:filelock:Attempting to release lock 139942325769584 on /home/ubuntu/.cache/huggingface/hub/.locks/models--llava-hf--bakLlava-v1-hf/a782b2f1cdab4d0bacb2dc0f85d02c4b1e31f0bd.lock\n",
      "DEBUG:filelock:Lock 139942325769584 released on /home/ubuntu/.cache/huggingface/hub/.locks/models--llava-hf--bakLlava-v1-hf/a782b2f1cdab4d0bacb2dc0f85d02c4b1e31f0bd.lock\n",
      "Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.\n"
     ]
    }
   ],
   "source": [
    "processor = AutoProcessor.from_pretrained('llava-hf/bakLlava-v1-hf')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "id": "791dddb0",
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "DEBUG:urllib3.connectionpool:Starting new HTTP connection (1): images.cocodataset.org:80\n",
      "DEBUG:urllib3.connectionpool:http://images.cocodataset.org:80 \"GET /val2017/000000039769.jpg HTTP/1.1\" 200 173131\n"
     ]
    }
   ],
   "source": [
    "raw_image = Image.open(requests.get(image_file, stream=True).raw)\n",
    "inputs = processor(prompt, raw_image, return_tensors='pt')\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 31,
   "id": "6ee5cffd",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "tensor([[    1,  2223,   725, 28747, 28705, 32000, 28705,    13,  3195,   460,\n",
       "          1167, 28804,    13,  4816,  8048, 12738, 28747]])"
      ]
     },
     "execution_count": 31,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "inputs['input_ids']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 33,
   "id": "c2b7479d",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'<s> USER: <image> \\nWhat are these?\\nASSISTANT:'"
      ]
     },
     "execution_count": 33,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "processor.tokenizer.decode(inputs['input_ids'][0])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 34,
   "id": "f048b86c",
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "LlamaTokenizerFast(name_or_path='llava-hf/bakLlava-v1-hf', vocab_size=32000, model_max_length=1000000000000000019884624838656, is_fast=True, padding_side='left', truncation_side='right', special_tokens={'bos_token': '<s>', 'eos_token': '</s>', 'unk_token': '<unk>', 'pad_token': '<pad>'}, clean_up_tokenization_spaces=False),  added_tokens_decoder={\n",
       "\t0: AddedToken(\"<unk>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t1: AddedToken(\"<s>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t2: AddedToken(\"</s>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t32000: AddedToken(\"<image>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t32001: AddedToken(\"<pad>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "}"
      ]
     },
     "execution_count": 34,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "processor.tokenizer"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 45,
   "id": "19c0b232",
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "DEBUG:urllib3.connectionpool:https://huggingface.co:443 \"HEAD /mesolitica/malaysian-mistral-7b-32k-instructions-v2/resolve/main/tokenizer_config.json HTTP/1.1\" 200 0\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "2"
      ]
     },
     "execution_count": 45,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "from transformers import AutoTokenizer\n",
    "\n",
    "tokenizer = AutoTokenizer.from_pretrained('mesolitica/malaysian-mistral-7b-32k-instructions-v2')\n",
    "tokenizer.add_tokens([\"<image>\", \"<pad>\"])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 46,
   "id": "6c698cde",
   "metadata": {},
   "outputs": [],
   "source": [
    "processor.tokenizer = tokenizer"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 48,
   "id": "8c37f00e",
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "DEBUG:urllib3.connectionpool:https://huggingface.co:443 \"POST /api/repos/create HTTP/1.1\" 200 153\n",
      "DEBUG:urllib3.connectionpool:https://huggingface.co:443 \"POST /api/models/huseinzol05/dummy-clip-vit-large-patch14-336-mistral-7b/preupload/main HTTP/1.1\" 200 317\n",
      "DEBUG:urllib3.connectionpool:https://huggingface.co:443 \"POST /api/models/huseinzol05/dummy-clip-vit-large-patch14-336-mistral-7b/commit/main HTTP/1.1\" 200 228\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "CommitInfo(commit_url='https://huggingface.co/huseinzol05/dummy-clip-vit-large-patch14-336-mistral-7b/commit/b2321ff8eb70176991cba00b801c7163345e1e99', commit_message='Upload processor', commit_description='', oid='b2321ff8eb70176991cba00b801c7163345e1e99', pr_url=None, pr_revision=None, pr_num=None)"
      ]
     },
     "execution_count": 48,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "processor.push_to_hub('huseinzol05/dummy-clip-vit-large-patch14-336-mistral-7b')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 50,
   "id": "e309616f",
   "metadata": {},
   "outputs": [],
   "source": [
    "import torch\n",
    "\n",
    "model = model.type(torch.bfloat16)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 52,
   "id": "a45ba399",
   "metadata": {},
   "outputs": [],
   "source": [
    "model.push_to_hub('huseinzol05/dummy-clip-vit-large-patch14-336-mistral-7b')"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.12"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
